#### PREAMBLE ####

library(ggplot2)
library(geosphere)
library(dplyr)
library(data.table)
library(tidyr)
library(zoo)
library(sf)
library(ggspatial)
library(RColorBrewer)
library(estimatr)
library(broom)
library(texreg)
library(did)


#### TEMPLATE ####
goldenScatterCAtheme <- theme(
  panel.background = element_rect(fill = "white"),
  aspect.ratio = ((1 + sqrt(5))/2)^(-1),
  axis.ticks.length = unit(0.5, "char"),
  axis.line.x.top = element_line(size = 0.2),
  axis.line.x.bottom = element_line(size = 0.2),
  axis.ticks.x = element_line(size = 0.2),
  axis.text.x = element_text(color = "black", size = 10),
  axis.title.x = element_text(size = 10, 
                              margin = margin(t = 7.5, r = 0, b = 0, l = 0)),
  axis.ticks.y = element_blank(),
  axis.text.y = element_text(color = "black", size = 10,
                             margin = margin(t = 0, r = -4, b = 0, l = 0)),
  axis.title.y = element_text(size = 10,
                              margin = margin(t = 0, r = 7.5, b = 0, l = 0)),
  #legend.key = element_rect(fill = NA, color = NA),
  panel.grid.major.x = element_blank(),
  panel.grid.major.y = element_line(color = "gray45", size = 0.2),
  strip.background = element_blank(),
  strip.text.x = element_text(size = 8),
  strip.text.y = element_blank(),
  strip.placement = "outside",
  panel.spacing.x = unit(1.25, "lines"),
  panel.spacing.y = unit(1, "lines")
)
class(goldenScatterCAtheme)
#### LOAD DATA ####

set.seed(20230711)

#load main data
df <- read.csv("Outputs/Analysis all muns.csv", stringsAsFactors = F, na.strings = c("", "NA"))
df$INEGI <- sprintf("%05d", df$INEGI)
df$INEGI <- as.character(df$INEGI)
df$time <- as.Date(df$time)

#### TRANSFORMATION ####

#states treated by US trade restrictions
unique(df$State[df$ustrade==1])
df$ustrade[df$time>=as.Date("2016-06-01") & df$trade==1 & (df$State=="MICHOACAN")] <- 1
#updating ustrade.
df$ustrade <- df$ustrade * df$trade

#### LOAD AND JOIN PUBLIC EXPENDITURE DATA ####

# Load necessary library
library(data.table) # Using data.table for faster file reading

# Define the path to the folder containing the CSV files
folder_path <- "Source data/Public expenditure/conjunto_de_datos"

# Define a pattern to match files ending with the years 2011 to 2019
year_pattern <- paste0("(", paste(2011:2019, collapse = "|"), ")\\.csv$")

# Get a list of all CSV files in the folder that match the year pattern
csv_files <- list.files(path = folder_path, pattern = year_pattern, full.names = TRUE)

# Load all matching CSV files into a list
csv_list <- lapply(csv_files, fread) # fread is used from data.table package for faster reading

combined_data <- rbindlist(csv_list)
gc()

spend <- combined_data
rm(combined_data, csv_list)
gc()

spend$ID_ENTIDAD <- sprintf("%02d", spend$ID_ENTIDAD)
spend$ID_MUNICIPIO <- sprintf("%03d", spend$ID_MUNICIPIO)
spend$INEGI <- paste0(spend$ID_ENTIDAD, spend$ID_MUNICIPIO)
spend$category <- paste0(spend$TEMA, spend$CATEGORIA, spend$DESCRIPCION_CATEGORIA)
spend$category <- gsub(" ", "", spend$category)
gc()

spend <- spend[,c("INEGI", "ANIO", "VALOR", "category")]
gc()

spend <- spend[spend$INEGI %in% df$INEGI,]

check <- spend %>% 
  dplyr::group_by(INEGI, ANIO, category) %>%
  dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
  dplyr::filter(n > 1L) 

spend <- spend[!spend$category %in% check$category,]

rm(check)
gc()

wide_format <- spend %>%
  pivot_wider(values_from="VALOR", names_from = "category")

rm(spend)
gc()

#summarize up to municipal level
variables <- colnames(wide_format)[3:52]
average_data <- wide_format %>%
  group_by(INEGI) %>%
  summarize_at(variables, mean, na.rm = TRUE)

spend <- average_data

rm(average_data, wide_format)
gc()


#### MEAN TRENDS PLOT PREP - FINDING US CO-TREATMENT GROUP ####

#separate the panels
min_time <- df %>% 
  group_by(INEGI) %>% 
  summarize(min_time = min(time))
panel1 <- min_time[min_time$min_time==as.Date("2011-01-01"),]
panel2 <- min_time[min_time$min_time==as.Date("2014-01-01"),]
panel1 <- df[df$INEGI %in% panel1$INEGI,]
panel2 <- df[df$INEGI %in% panel2$INEGI,]

unique(df$treat_group)

#figure out date where most units get treated by exposure to us trade

ustrade_means <- df %>% 
  group_by(INEGI) %>% 
  summarize(ustrade_mean = mean(ustrade))

ustrade_start <- df %>% 
  group_by(INEGI) %>% 
  filter(ustrade==1) %>% 
  summarize(start_date = min(time))

ustrade_means <- ustrade_means[ustrade_means$ustrade_mean>0 & ustrade_means$ustrade_mean<1,]
ustrade_start <- ustrade_start[ustrade_start$INEGI %in% ustrade_means$INEGI,]
table(ustrade_start$start_date)
us_treat_time <- as.Date("2016-06-01")
ustrade_start <- ustrade_start[ustrade_start$start_date==us_treat_time,]

#subset to those units
us_treat_units <- ustrade_start$INEGI

#### FIND MATCHED PAIRS ####

matched_pairs <- read.csv("Outputs/genetic_matched_pairs.csv", stringsAsFactors = F)

# Filter the matched pairs for the specific treated municipalities
specific_matches <- matched_pairs[matched_pairs$treated_INEGI %in% us_treat_units, ]

matches <- c(specific_matches$treated_INEGI, specific_matches$control_INEGI)

length(unique(matches))

#### LINEAR TRENDS PLOT ####


#pick colors
display.brewer.all(n=NULL, type="all", select=NULL, exact.n=TRUE)
colors <- brewer.pal(n=9, name="Pastel1")
white <- "white"
grey <- "gray85"
red <- colors[1]
green <- colors[3]
darkred <- brewer.pal(n=9, name="Reds")[5]
darkgreen <- brewer.pal(n=9, name="Greens")[5]
black <- "black"

panel <- df[df$INEGI %in% matches,]

panel$treat_group_sub <- NA_character_
panel$treat_group_sub <- "Genetic Matched Controls"
panel$treat_group_sub[panel$INEGI %in% us_treat_units] <- "Michoacan Nine"

means1 <- panel %>% 
  group_by(time, treat_group_sub) %>% 
  summarize(mean = mean(malicious_homicides, na.rm=T))

nums <- 12
ggplot(data=means1, aes(x=time, y=mean))+
  geom_line(aes(color=treat_group_sub))+
  scale_color_manual(values=c("darkgrey", "blue"))+
  geom_vline(xintercept=as.Date(us_treat_time), linetype=2)+
  theme_classic(base_size=15)+
  xlab("Time")+
  ylab("Mean value of monthly intentional homicides, \nmunicipal level")+
  guides(color = guide_legend(title = NULL))+
  theme(legend.position = c(0.3, 0.65), axis.text = element_text(size = nums), legend.text = element_text(size = nums))

h <- 6
w <- 1.618*h
ggsave("Outputs/plots/Fig5_means_trend_plot.pdf", width=w, height=h)

#

#### MAP OF TREAT_GROUP ####

library(ggplot2)
library(ggspatial)
library(ggpattern)

#create data of treat_group by municipality
map_data <- panel[,c("INEGI", "treat_group_sub")]
map_data <- map_data %>% 
  distinct(INEGI, .keep_all=T)

#municipalities shape file
mun <- st_read("Source data/Map files/00mun.shp")
st_crs(mun)
#states shape file
state <- st_read("Source data/Map files/Entidades_2010_5.shp")

#MERGE QUERY AND MUN DATASETS
mun <- left_join(mun, map_data, by = c("CVEGEO"="INEGI"))

mun$treat_group_sub[is.na(mun$treat_group_sub)] <- "Missing data or excluded"

#relevel treat group factor
map_data$treat_group_sub <- factor(map_data$treat_group_sub, levels=c("Missing data", "Control", "Export certified before 2011", "Export certified between 2011-2020"))


#add centroids so you can plot state names
state <- cbind(state, st_coordinates(st_centroid(state)))
#rename some states
state$NOM_ENT <- as.character(state$NOM_ENT)
state$NOM_ENT
state$NOM_ENT[3] <- "Mexico"
state$NOM_ENT[14] <- "Michoacan"
state$NOM_ENT[21] <- "Queretaro"
state$NOM_ENT[22] <- "San Luis Potosi"
state$NOM_ENT[27] <- "Nuevo Leon"
state$NOM_ENT[30] <- "Yucatan"
state$NOM_ENT[26] <- "Veracruz"
state$NOM_ENT[28] <- "Coahuila"
state$NOM_ENT[1] <- ""

mun <- st_transform(mun, crs=6372)
st_crs(mun)
state <- st_transform(state, crs=6372)
st_crs(state)

h <- 6
w <- 1.618*h
#create pdf of map with states and municipalities and query presence in 2010
#Plot the data
ggplot() +
  ggspatial::annotation_spatial(mun) +
  ggspatial::layer_spatial(mun, aes(fill=treat_group_sub), color="gray14", linewidth=.05)+
  scale_fill_manual(values=c(grey, darkgreen, white))+
  geom_sf(data = state, fill=NA, color=black, linewidth=.4) +
  geom_text(data = state, aes(X, Y, label = NOM_ENT), size = 4)+
  theme_void()+
  coord_sf(xlim = c(2200000, 2800000), ylim = c(600000, 950000), expand = FALSE)+
  theme(legend.position = c(0.2, 0.2), legend.title=element_blank(), legend.key=element_blank())
ggsave("Outputs/plots/Fig12_Mexico_state_mun_treat_groups_mich_nine.png", width=w, height=h)

#
